In [48]:
import numpy as np
import pandas as pd
import yfinance as yf

## Read in and clean TikTok data

In [49]:
df = pd.read_csv("athleisure.csv")

# Add a column for diggs-to-plays ratio
df['diggCount/playCount'] = df['diggCount'] / df['playCount']

# Sort by time
df = df.sort_values(by='createTimeISO')

# Get date in y-m-d format from `df`
df['createTimeISO'] = pd.to_datetime(df['createTimeISO'])
df['date'] = df['createTimeISO'].dt.strftime('%Y-%m-%d')

# Create `cleaned_df`, keeping only the following columns of `df`
cleaned_df = df[['date', 'diggCount', 'playCount', 'diggCount/playCount']]

# Create `aggregated_df`, aggregating metadata for videos made on the same day.
# Ex:
# date                  diggCount
# 2019-11-01            200
# 2019-11-01            150
# will become:
# date                  diggCount
# 2019-11-01            350
aggregated_df = cleaned_df.groupby('date', as_index = False)[['diggCount', 'playCount', 'diggCount/playCount']].sum()

aggregated_df['date'] = pd.to_datetime(aggregated_df['date']).dt.date

aggregated_df

Unnamed: 0,date,diggCount,playCount,diggCount/playCount
0,2019-03-08,529,62400,0.008478
1,2019-04-27,376,55700,0.006750
2,2019-11-11,6222,253000,0.024593
3,2019-11-20,608,51100,0.011898
4,2019-11-21,5451,184600,0.051415
...,...,...,...,...
193,2024-11-06,277,552900,0.000501
194,2024-11-07,57,21900,0.002603
195,2024-11-12,3702,158655,0.506553
196,2024-11-13,747,19739,1.230954


## Get and clean stock data

In [50]:
# Get prices of specified stock for each day
ticker = "LULU"
data = yf.Ticker(ticker)
prices = data.history(start = '2019-10-01', end = '2024-11-16') # can change time interval; prices is a dataframe

# Reset indices of `prices`, dropping old indices. 
prices.reset_index(inplace=True)

prices['Date'] = pd.to_datetime(prices['Date']).dt.date

prices

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2019-10-01,193.410004,194.070007,191.320007,192.690002,1317200,0.0,0.0
1,2019-10-02,190.919998,191.750000,187.119995,189.130005,1351900,0.0,0.0
2,2019-10-03,188.759995,190.300003,185.360001,190.149994,1382800,0.0,0.0
3,2019-10-04,190.449997,193.429993,189.949997,193.119995,1193700,0.0,0.0
4,2019-10-07,191.919998,193.399994,190.809998,191.660004,1162200,0.0,0.0
...,...,...,...,...,...,...,...,...
1287,2024-11-11,310.630005,320.700012,308.619995,318.459991,2064000,0.0,0.0
1288,2024-11-12,317.000000,322.250000,314.119995,320.899994,1159200,0.0,0.0
1289,2024-11-13,323.709991,334.350006,321.739990,328.299988,2001000,0.0,0.0
1290,2024-11-14,328.679993,334.579987,324.529999,330.260010,1579600,0.0,0.0


## Merging data

In [51]:
# print(cleaned_df.head(5))
# print(prices.head(5))

# print("cleaned_df dtypes: \n" + cleaned_df.dtypes)
# print("prices dtypes: \n" + prices.dtypes)


# Merge `cleaned_df` and `prices`, based on dates (use inner merge - only include data that is common to both dataframes)
merged_df = pd.merge(
    aggregated_df,
    prices[['Date', 'Close']],
    left_on = aggregated_df['date'],
    right_on = prices['Date'],
    # left_on = pd.to_datetime(aggregated_df['date']).dt.date,
    # right_on = pd.to_datetime(prices['Date']).dt.date,
    how = 'inner'
    )

# Only keep relevant columns (use closing prices of stock for now)
merged_df = merged_df[['date', 'diggCount', 'playCount', 'diggCount/playCount', 'Close']]

# Calculate percentage change in diggCount/playCount 
merged_df['change in d/p'] = merged_df['diggCount/playCount'].pct_change()

# Calculate percentage change per time
merged_df['deltaTime'] = merged_df['date'].diff().dt.days

merged_df['change'] = merged_df['change in d/p'] / merged_df['deltaTime']

merged_df = merged_df[['date', 'change', 'Close']]

print(merged_df.dtypes)

merged_df

date       object
change    float64
Close     float64
dtype: object


Unnamed: 0,date,change,Close
0,2019-11-11,,207.190002
1,2019-11-20,-0.057355,217.039993
2,2019-11-21,3.321257,216.740005
3,2019-12-31,-0.020714,231.669998
4,2020-01-02,3.284759,233.419998
...,...,...,...
146,2024-11-06,-0.485340,313.140015
147,2024-11-07,4.195144,315.299988
148,2024-11-12,38.724583,320.899994
149,2024-11-13,1.430061,328.299988


## Strategy (daily)

In [52]:
class Strategy:
    def __init__(self, data, balance = 1000000):
        self.data = data
        self.balance = balance
        self.shares = 0
        self.transactions = 0

    # Helper function that gets bid/ask price
    def get_price(self, date):
        row = self.data[self.data['date'] == date]
        if not row.empty:
            return row['Close'].values[0]
        else:
            return None
            

    # Buy as many shares as possible with remaining balance
    def buy(self, date):
        if self.balance > 0:
            bid = self.get_price(date)
            self.shares = self.balance / bid 
            print(f"Bought {self.shares} shares at {bid} \n Balance: {self.balance}    Shares: {self.shares}")
            self.balance = 0
            self.transactions += 1
        else:
            print("Balance too low to buy") 

    # Sell as many shares as possible given ask pricce
    def sell(self, date):
        if self.shares > 0:
            ask = self.get_price(date)
            self.balance += self.shares * ask
            print(f"Sold {self.shares} shares at {ask} \n Balance: {self.balance}    Shares: {self.shares}")
            self.shares = 0
            self.transactions += 1
        else:
            print("No shares to sell") 
            
    # Compare change 
    def run(self):
        print(f"Starting balance: {self.balance}")
        print(f"Starting shares: {self.shares}")
        print(f"Number of transactions: {self.transactions} \n")
        
        # Iterate through each row in `data` and print the date, followed by the action taken (buy/sell) and the results
        for i in range(1, len(self.data)): 
            # Print date
            print(self.data['date'].iloc[i], end = ": ")

            # If the change in diggCount/playCount is larger than 6 (arbitrarily set)
            if self.data['change'].iloc[i] > 2.0:
                self.buy(self.data['date'].iloc[i])
            elif self.data['change'].iloc[i] < 0.0:
                self.sell(self.data['date'].iloc[i])
            else:
                continue
            
        print(f"\nEnding balance: {self.balance}")
        print(f"Ending shares: {self.shares}")
        print(f"Number of transactions: {self.transactions}")

## Run strategy

In [53]:
strategy = Strategy(merged_df, balance = 1000000)
strategy.run()

Starting balance: 1000000
Starting shares: 0
Number of transactions: 0 

2019-11-20: No shares to sell
2019-11-21: Bought 4613.822896814219 shares at 216.74000549316406 
 Balance: 1000000    Shares: 4613.822896814219
2019-12-31: Sold 4613.822896814219 shares at 231.6699981689453 
 Balance: 1068884.342056788    Shares: 4613.822896814219
2020-01-02: Bought 4579.232072837 shares at 233.4199981689453 
 Balance: 1068884.342056788    Shares: 4579.232072837
2020-01-03: 2020-01-06: Sold 4579.232072837 shares at 235.42999267578125 
 Balance: 1078088.5733687175    Shares: 4579.232072837
2020-01-14: No shares to sell
2020-01-21: 2020-01-22: Bought 4412.9699545697995 shares at 244.3000030517578 
 Balance: 1078088.5733687175    Shares: 4412.9699545697995
2020-01-23: Sold 4412.9699545697995 shares at 243.05999755859375 
 Balance: 1072616.466383883    Shares: 4412.9699545697995
2020-01-24: No shares to sell
2021-03-02: No shares to sell
2021-08-04: 2021-09-09: No shares to sell
2022-01-21: No shares 

## Notes
problems: scraper is weird, percent change calculation is off bc we don't have data every day (jumps multiple days), etc
modifications: set different thresholds for when to buy/sell, set diff amounts for buying/selling, transaction fee (?), buying non-whole shares