In [9]:
import numpy as np
import pandas as pd
import yfinance as yf

## Read in and clean TikTok data

In [10]:
df = pd.read_csv("loungewear.csv")

# Add a column for diggs-to-plays ratio
df['diggCount/playCount'] = df['diggCount'] / df['playCount']

# Keep only the following columns; delete all other columns
df = df[['createTimeISO','diggCount', 'playCount', 'diggCount/playCount', 'shareCount', 'commentCount', 'videoMeta/duration']]

# df['means'] = df[['diggCount', 'shareCount', 'playCount', 'commentCount', 'videoMeta/duration']].mean(axis=1)

# Sort by time
df = df.sort_values(by='createTimeISO')

df

Unnamed: 0,createTimeISO,diggCount,playCount,diggCount/playCount,shareCount,commentCount,videoMeta/duration
59,2021-04-11T21:57:13.000Z,15,1327,0.011304,1,6,15
31,2021-05-02T22:58:49.000Z,74,5641,0.013118,1,2,20
227,2021-09-20T19:12:55.000Z,241800,1500000,0.161200,717,666,22
105,2022-05-04T22:42:17.000Z,1,126,0.007937,0,0,44
179,2022-05-20T16:18:00.000Z,602,113700,0.005295,6,14,32
...,...,...,...,...,...,...,...
255,2024-11-23T15:27:17.000Z,0,237,0.000000,0,1,10
69,2024-11-23T15:47:13.000Z,4,236,0.016949,0,0,9
64,2024-11-23T15:54:49.000Z,1,77,0.012987,0,0,24
65,2024-11-23T15:55:46.000Z,4,114,0.035088,0,1,33


## Get and clean stock data

In [11]:
# Get prices of specified stock for each day
ticker = "VSCO"
data = yf.Ticker(ticker)
prices = data.history(start = '2019-10-01', end = '2024-11-16') # can change time interval; prices is a dataframe
prices

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-07-21 00:00:00-04:00,55.000000,55.000000,39.994999,42.500000,96100,0.0,0.0
2021-07-22 00:00:00-04:00,42.750000,42.750000,39.790001,40.900002,401200,0.0,0.0
2021-07-23 00:00:00-04:00,41.980000,42.200001,40.990002,42.139999,114400,0.0,0.0
2021-07-26 00:00:00-04:00,40.200001,41.500000,40.060001,41.200001,22000,0.0,0.0
2021-07-27 00:00:00-04:00,41.500000,41.520000,41.099998,41.435001,83000,0.0,0.0
...,...,...,...,...,...,...,...
2024-11-11 00:00:00-05:00,35.580002,35.652000,33.849998,34.970001,2243300,0.0,0.0
2024-11-12 00:00:00-05:00,35.500000,37.616001,34.980000,35.779999,2327200,0.0,0.0
2024-11-13 00:00:00-05:00,36.130001,36.660000,35.599998,35.939999,1655600,0.0,0.0
2024-11-14 00:00:00-05:00,35.840000,36.875000,35.720001,36.040001,1269400,0.0,0.0


## Merging data

In [12]:
# Get y-m-d date of TikTok dataframe
df['createTimeISO'] = pd.to_datetime(df['createTimeISO'])
df['date'] = df['createTimeISO'].dt.strftime('%Y-%m-%d')

# Create cleaned_df with select columns of TikTok data
cleaned_df = df[['date', 'diggCount', 'playCount', 'diggCount/playCount']]

# Create aggregated_df, aggregating the metadata for videos made on the same day.
# Ex:
# date                  diggCount
# 2019-11-01            200
# 2019-11-01            150
# will become:
# date                  diggCount
# 2019-11-01            350
aggregated_df = cleaned_df.groupby('date', as_index = False)[['diggCount', 'playCount', 'diggCount/playCount']].sum()

aggregated_df

# Reset indices of `prices`, dropping old indices. Keep only the date in the `Date` column of the prices dataframe
prices.reset_index(inplace=True)


# print(cleaned_df.head(5))
# print(prices.head(5))

# print("cleaned_df dtypes: \n" + cleaned_df.dtypes)
# print("prices dtypes: \n" + prices.dtypes)


# Merge `cleaned_df` and `prices`, based on dates (use inner merge - only include data that is common to both dataframes)
merged_df = pd.merge(
    aggregated_df,
    prices[['Date', 'Close']],
    left_on = pd.to_datetime(aggregated_df['date']).dt.date,
    right_on = pd.to_datetime(prices['Date']).dt.date,
    how = 'inner'
    )

# Only keep relevant columns (use closing prices of stock for now)
merged_df = merged_df[['date', 'diggCount', 'playCount', 'diggCount/playCount', 'Close']]

# Calculate percentage change in diggCount/playCount 
merged_df['change'] = merged_df['diggCount/playCount'].pct_change()

merged_df

Unnamed: 0,date,diggCount,playCount,diggCount/playCount,Close,change
0,2021-09-20,241800,1500000,0.161200,60.330002,
1,2022-05-04,1,126,0.007937,50.740002,-0.950766
2,2022-05-20,602,113700,0.005295,43.330002,-0.332876
3,2022-10-19,581,79900,0.007272,36.209999,0.373388
4,2022-11-01,1924,150900,0.012750,38.799999,0.753422
...,...,...,...,...,...,...
107,2024-11-11,141837,1570033,0.287078,34.970001,2.074406
108,2024-11-12,3071,92512,0.107938,35.779999,-0.624011
109,2024-11-13,294,31400,0.009363,35.939999,-0.913255
110,2024-11-14,92,1697,0.054213,36.040001,4.790130


## Strategy (daily)

In [13]:
class Strategy:
    def __init__(self, data, balance = 1000000):
        self.data = data
        self.balance = balance
        self.shares = 0
        self.transactions = 0

    # Helper function that gets bid/ask price
    def get_price(self, date):
        row = self.data[self.data['date'] == date]
        if not row.empty:
            return row['Close'].values[0]
        else:
            return None
            

    # Buy as many shares as possible with remaining balance
    def buy(self, date):
        if self.balance > 0:
            bid = self.get_price(date)
            self.shares = self.balance / bid 
            print(f"Bought {self.shares} shares at {bid} \n Balance: {self.balance}    Shares: {self.shares}")
            self.balance = 0
            self.transactions += 1
        else:
            print("Balance too low to buy") 

    # Sell as many shares as possible given ask pricce
    def sell(self, date):
        if self.shares > 0:
            ask = self.get_price(date)
            self.balance += self.shares * ask
            print(f"Sold {self.shares} shares at {ask} \n Balance: {self.balance}    Shares: {self.shares}")
            self.shares = 0
            self.transactions += 1
        else:
            print("No shares to sell") 
            
    # Compare change 
    def run(self):
        print(f"Starting balance: {self.balance}")
        print(f"Starting shares: {self.shares}")
        print(f"Number of transactions: {self.transactions} \n")
        
        # Iterate through each row in `data` and print the date, followed by the action taken (buy/sell) and the results
        for i in range(1, len(self.data)): 
            # Print date
            print(self.data['date'].iloc[i], end = ": ")

            # If the change in diggCount/playCount is larger than 5 (arbitrarily set)
            if self.data['change'].iloc[i] > 2.5:
                self.buy(self.data['date'].iloc[i])
            else:
                self.sell(self.data['date'].iloc[i])
            
        print(f"\nEnding balance: {self.balance}")
        print(f"Ending shares: {self.shares}")
        print(f"Number of transactions: {self.transactions}")

## Run strategy

In [14]:
strategy = Strategy(merged_df, balance = 1000000)
strategy.run()

Starting balance: 1000000
Starting shares: 0
Number of transactions: 0 

2022-05-04: No shares to sell
2022-05-20: No shares to sell
2022-10-19: No shares to sell
2022-11-01: No shares to sell
2022-12-29: No shares to sell
2023-03-13: Bought 32446.46365677725 shares at 30.81999969482422 
 Balance: 1000000    Shares: 32446.46365677725
2023-03-24: Sold 32446.46365677725 shares at 31.850000381469727 
 Balance: 1033419.8798456991    Shares: 32446.46365677725
2023-03-30: No shares to sell
2023-04-11: Bought 30385.766858194645 shares at 34.0099983215332 
 Balance: 1033419.8798456991    Shares: 30385.766858194645
2023-04-17: Sold 30385.766858194645 shares at 33.13999938964844 
 Balance: 1006984.2951345702    Shares: 30385.766858194645
2023-06-13: No shares to sell
2023-07-07: No shares to sell
2023-08-07: No shares to sell
2023-08-14: No shares to sell
2023-08-29: No shares to sell
2023-09-12: No shares to sell
2023-10-10: No shares to sell
2023-10-11: Bought 67447.03781321955 shares at 14.93

## Notes
problems: scraper is weird, percent change calculation is off bc we don't have data every day (jumps multiple days), etc
modifications: set different thresholds for when to buy/sell, set diff amounts for buying/selling, transaction fee (?), buying non-whole shares