In [234]:
import numpy as np
import pandas as pd
import yfinance as yf

## Read in and clean TikTok data

In [235]:
df = pd.read_csv("athleisure.csv")

# Add a column for diggs-to-plays ratio
df['diggCount/playCount'] = df['diggCount'] / df['playCount']

# Keep only the following columns; delete all other columns
df = df[['createTimeISO','diggCount', 'playCount', 'diggCount/playCount', 'shareCount', 'commentCount', 'videoMeta/duration']]

# df['means'] = df[['diggCount', 'shareCount', 'playCount', 'commentCount', 'videoMeta/duration']].mean(axis=1)

# Sort by time
df = df.sort_values(by='createTimeISO')

df

Unnamed: 0,createTimeISO,diggCount,playCount,diggCount/playCount,shareCount,commentCount,videoMeta/duration
7,2019-03-08T22:15:23.000Z,529,62400,0.008478,11,6,15
8,2019-04-27T15:29:13.000Z,376,55700,0.006750,3,3,15
5,2019-11-11T23:56:53.000Z,6222,253000,0.024593,374,70,26
14,2019-11-20T18:10:32.000Z,608,51100,0.011898,14,8,15
6,2019-11-21T18:18:44.000Z,645,71100,0.009072,11,2,15
...,...,...,...,...,...,...,...
47,2024-11-14T01:19:28.000Z,2,108,0.018519,0,1,31
23,2024-11-14T01:27:04.000Z,36,1671,0.021544,0,1,37
99,2024-11-14T01:57:14.000Z,4,99,0.040404,0,0,98
210,2024-11-14T02:01:58.000Z,12,423,0.028369,0,0,75


## Get and clean stock data

In [236]:
# Get prices of specified stock for each day
ticker = "LULU"
data = yf.Ticker(ticker)
prices = data.history(start = '2019-10-01', end = '2024-11-16') # can change time interval; prices is a dataframe
prices

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-10-01 00:00:00-04:00,193.410004,194.070007,191.320007,192.690002,1317200,0.0,0.0
2019-10-02 00:00:00-04:00,190.919998,191.750000,187.119995,189.130005,1351900,0.0,0.0
2019-10-03 00:00:00-04:00,188.759995,190.300003,185.360001,190.149994,1382800,0.0,0.0
2019-10-04 00:00:00-04:00,190.449997,193.429993,189.949997,193.119995,1193700,0.0,0.0
2019-10-07 00:00:00-04:00,191.919998,193.399994,190.809998,191.660004,1162200,0.0,0.0
...,...,...,...,...,...,...,...
2024-11-11 00:00:00-05:00,310.630005,320.700012,308.619995,318.459991,2064000,0.0,0.0
2024-11-12 00:00:00-05:00,317.000000,322.250000,314.119995,320.899994,1159200,0.0,0.0
2024-11-13 00:00:00-05:00,323.709991,334.350006,321.739990,328.299988,2001000,0.0,0.0
2024-11-14 00:00:00-05:00,328.679993,334.579987,324.529999,330.260010,1579600,0.0,0.0


## Merging data

In [237]:
# Get y-m-d date of TikTok dataframe
df['createTimeISO'] = pd.to_datetime(df['createTimeISO'])
df['date'] = df['createTimeISO'].dt.strftime('%Y-%m-%d')

# Create cleaned_df with select columns of TikTok data
cleaned_df = df[['date', 'diggCount', 'playCount', 'diggCount/playCount']]

# Create aggregated_df, aggregating the metadata for videos made on the same day.
# Ex:
# date                  diggCount
# 2019-11-01            200
# 2019-11-01            150
# will become:
# date                  diggCount
# 2019-11-01            350
aggregated_df = cleaned_df.groupby('date', as_index = False)[['diggCount', 'playCount', 'diggCount/playCount']].sum()

aggregated_df

# Reset indices of `prices`, dropping old indices. Keep only the date in the `Date` column of the prices dataframe
prices.reset_index(inplace=True)


# print(cleaned_df.head(5))
# print(prices.head(5))

# print("cleaned_df dtypes: \n" + cleaned_df.dtypes)
# print("prices dtypes: \n" + prices.dtypes)


# Merge `cleaned_df` and `prices`, based on dates (use inner merge - only include data that is common to both dataframes)
merged_df = pd.merge(
    aggregated_df,
    prices[['Date', 'Close']],
    left_on = pd.to_datetime(aggregated_df['date']).dt.date,
    right_on = pd.to_datetime(prices['Date']).dt.date,
    how = 'inner'
    )

# Only keep relevant columns (use closing prices of stock for now)
merged_df = merged_df[['date', 'diggCount', 'playCount', 'diggCount/playCount', 'Close']]

# Calculate percentage change in diggCount/playCount 
merged_df['change'] = merged_df['diggCount/playCount'].pct_change()

merged_df

Unnamed: 0,date,diggCount,playCount,diggCount/playCount,Close,change
0,2019-11-11,6222,253000,0.024593,207.190002,
1,2019-11-20,608,51100,0.011898,217.039993,-0.516192
2,2019-11-21,5451,184600,0.051415,216.740005,3.321257
3,2019-12-31,424,48100,0.008815,231.669998,-0.828554
4,2020-01-02,45700,684900,0.066725,233.419998,6.569518
...,...,...,...,...,...,...
146,2024-11-06,277,552900,0.000501,313.140015,-0.970680
147,2024-11-07,57,21900,0.002603,315.299988,4.195144
148,2024-11-12,3702,158655,0.506553,320.899994,193.622915
149,2024-11-13,747,19739,1.230954,328.299988,1.430061


## Strategy (daily)

In [238]:
class Strategy:
    def __init__(self, data, balance = 1000000):
        self.data = data
        self.balance = balance
        self.shares = 0
        self.transactions = 0

    # Helper function that gets bid/ask price
    def get_price(self, date):
        row = self.data[self.data['date'] == date]
        if not row.empty:
            return row['Close'].values[0]
        else:
            return None
            

    # Buy as many shares as possible with remaining balance
    def buy(self, date):
        if self.balance > 0:
            bid = self.get_price(date)
            self.shares = self.balance / bid 
            print(f"Bought {self.shares} shares at {bid} \n Balance: {self.balance}    Shares: {self.shares}")
            self.balance = 0
            self.transactions += 1
        else:
            print("Balance too low to buy") 

    # Sell as many shares as possible given ask pricce
    def sell(self, date):
        if self.shares > 0:
            ask = self.get_price(date)
            self.balance += self.shares * ask
            print(f"Sold {self.shares} shares at {ask} \n Balance: {self.balance}    Shares: {self.shares}")
            self.shares = 0
            self.transactions += 1
        else:
            print("No shares to sell") 
            
    # Compare change 
    def run(self):
        print(f"Starting balance: {self.balance}")
        print(f"Starting shares: {self.shares}")
        print(f"Number of transactions: {self.transactions} \n")
        
        # Iterate through each row in `data` and print the date, followed by the action taken (buy/sell) and the results
        for i in range(1, len(self.data)): 
            # Print date
            print(self.data['date'].iloc[i], end = ": ")

            # If the change in diggCount/playCount is larger than 5 (arbitrarily set)
            if self.data['change'].iloc[i] > 5.0:
                self.buy(self.data['date'].iloc[i])
            else:
                self.sell(self.data['date'].iloc[i])
            
        print(f"\nEnding balance: {self.balance}")
        print(f"Ending shares: {self.shares}")
        print(f"Number of transactions: {self.transactions}")

## Run strategy

In [239]:
strategy = Strategy(merged_df, balance = 1000000)
strategy.run()

Starting balance: 1000000
Starting shares: 0
Number of transactions: 0 

2019-11-20: No shares to sell
2019-11-21: No shares to sell
2019-12-31: No shares to sell
2020-01-02: Bought 4284.123073620357 shares at 233.4199981689453 
 Balance: 1000000    Shares: 4284.123073620357
2020-01-03: Sold 4284.123073620357 shares at 232.63999938964844 
 Balance: 996658.3892322186    Shares: 4284.123073620357
2020-01-06: No shares to sell
2020-01-14: No shares to sell
2020-01-21: No shares to sell
2020-01-22: Bought 4079.6495160954414 shares at 244.3000030517578 
 Balance: 996658.3892322186    Shares: 4079.6495160954414
2020-01-23: Sold 4079.6495160954414 shares at 243.05999755859375 
 Balance: 991599.6014220761    Shares: 4079.6495160954414
2020-01-24: No shares to sell
2021-03-02: No shares to sell
2021-08-04: Bought 2415.76631294161 shares at 410.4700012207031 
 Balance: 991599.6014220761    Shares: 2415.76631294161
2021-09-09: Sold 2415.76631294161 shares at 420.7099914550781 
 Balance: 1016337.0

## Notes
problems: scraper is weird, percent change calculation is off bc we don't have data every day (jumps multiple days), etc
modifications: set different thresholds for when to buy/sell, set diff amounts for buying/selling, transaction fee (?), buying non-whole shares