In [204]:
import numpy as np
import pandas as pd
import yfinance as yf

## Read in and clean TikTok data

In [None]:
df = pd.read_csv("athleisure.csv")

# Add a column for diggs-to-plays ratio
df['diggCount/playCount'] = df['diggCount'] / df['playCount']

# Keep only the following columns; delete all other columns
df = df[['createTimeISO','diggCount', 'playCount', 'diggCount/playCount', 'shareCount', 'commentCount', 'videoMeta/duration']]

# df['means'] = df[['diggCount', 'shareCount', 'playCount', 'commentCount', 'videoMeta/duration']].mean(axis=1)

# Sort by time
df = df.sort_values(by='createTimeISO')

df

## Get and clean stock data

In [None]:
# Get prices of specified stock for each day
ticker = "LULU"
data = yf.Ticker(ticker)
prices = data.history(start = '2019-10-01', end = '2024-11-16') # can change time interval; prices is a dataframe
prices

## Merging data

In [None]:
# Get y-m-d date of TikTok dataframe
df['createTimeISO'] = pd.to_datetime(df['createTimeISO'])
df['date'] = df['createTimeISO'].dt.strftime('%Y-%m-%d')

# Create cleaned_df with select columns of TikTok data
cleaned_df = df[['date', 'diggCount', 'playCount', 'diggCount/playCount']]

# Create aggregated_df, aggregating the metadata for videos made on the same day.
# Ex:
# date                  diggCount
# 2019-11-01            200
# 2019-11-01            150
# will become:
# date                  diggCount
# 2019-11-01            350
aggregated_df = cleaned_df.groupby('date', as_index = False)[['diggCount', 'playCount', 'diggCount/playCount']].sum()

aggregated_df

# Reset indices of `prices`, dropping old indices. Keep only the date in the `Date` column of the prices dataframe
prices.reset_index(inplace=True)


# print(cleaned_df.head(5))
# print(prices.head(5))

# print("cleaned_df dtypes: \n" + cleaned_df.dtypes)
# print("prices dtypes: \n" + prices.dtypes)


# Merge `cleaned_df` and `prices`, based on dates (use inner merge - only include data that is common to both dataframes)
merged_df = pd.merge(
    aggregated_df,
    prices[['Date', 'Close']],
    left_on = pd.to_datetime(aggregated_df['date']).dt.date,
    right_on = pd.to_datetime(prices['Date']).dt.date,
    how = 'inner'
    )

# Only keep relevant columns (use closing prices of stock for now)
merged_df = merged_df[['date', 'diggCount', 'playCount', 'diggCount/playCount', 'Close']]

# Calculate percentage change in diggCount/playCount 
merged_df['change'] = merged_df['diggCount/playCount'].pct_change()

merged_df

## Strategy (daily)

In [208]:
class Strategy:
    def __init__(self, data, balance = 1000000):
        self.data = data
        self.balance = balance
        self.shares = 0
        self.transactions = 0

    # Helper function that gets bid/ask price
    def get_price(self, date):
        row = self.data[self.data['date'] == date]
        if not row.empty:
            return row['Close'].values[0]
        else:
            return None
            

    # Buy as many shares as possible with remaining balance
    def buy(self, date):
        if self.balance > 0:
            bid = self.get_price(date)
            self.shares = self.balance / bid 
            print(f"Bought {self.shares} shares at {bid} \n Balance: {self.balance}    Shares: {self.shares}")
            self.balance = 0
            self.transactions += 1
        else:
            print("Balance too low to buy") 

    # Sell as many shares as possible given ask pricce
    def sell(self, date):
        if self.shares > 0:
            ask = self.get_price(date)
            self.balance += self.shares * ask
            print(f"Sold {self.shares} shares at {ask} \n Balance: {self.balance}    Shares: {self.shares}")
            self.shares = 0
            self.transactions += 1
        else:
            print("No shares to sell") 
            
    # Compare change 
    def run(self):
        # Iterate through each row in `data` and print the date, followed by the action taken (buy/sell) and the results
        for i in range(1, len(self.data)): 
            # Print date
            print(self.data['date'].iloc[i], end = ": ")

            # If the change in diggCount/playCount is larger than 5 (arbitrarily set)
            if self.data['change'].iloc[i] > 5.0:
                self.buy(self.data['date'].iloc[i])
            else:
                self.sell(self.data['date'].iloc[i])
            
        print(f"Ending balance: {self.balance}")
        print(f"Ending shares: {self.shares}")
        print(f"Number of transactions: {self.transactions}")

## Run strategy

In [None]:
strategy = Strategy(merged_df, balance = 1000000)
strategy.run()

## Notes
problems: scraper is weird, percent change calculation is off bc we don't have data every day (jumps multiple days), etc
modifications: set different thresholds for when to buy/sell, set diff amounts for buying/selling, transaction fee (?)