# Import Libraries

In [1]:
import pandas as pd
import re
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Import Interview Results

In [2]:
def load_text_file(file_path) -> list:
    """
    Load search terms for market signals or profile list from text file.

    Args:
        file_path (str): The path to the text file containing search terms/profiles, one per line.

    Returns:
        list: A list of search terms/profiles as strings.
    """
    full_file_path = f"../config/{file_path}"
    with open(full_file_path, "r") as file:
        return [line.strip() for line in file]

In [None]:
identification_results = pd.read_csv("../data/market-signals-finfluencer/profile_metadata_post_identification.csv")
print(identification_results.shape)
identification_results.head()

(397, 30)


Unnamed: 0,custom_id,id,profile,profileUrl,nickName,verified,signature,bioLink,originalAvatarUrl,avatar,privateAccount,region,roomId,ttSeller,following,friends,fans,heart,video,digg,commerceUserInfo.commerceUser,commerceUserInfo.downLoadLink.android,commerceUserInfo.downLoadLink.ios,commerceUserInfo.category,commerceUserInfo.categoryButton,extractionTime,transcripts_combined,identification_user_prompt,identification_system_prompt,identification_llm_response
0,0,6870704589100942341,caprice4l,https://www.tiktok.com/@caprice4l,Caprice,False,I motivate the youth,https://linktr.ee/caprrice,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,False,US,,False,284.0,232.0,262700.0,9400000.0,592.0,0.0,False,,,,,2025-03-14 17:45:19.606972+00:00,Creation Date: 2025-02-21 01:05:09+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...
1,1,7077615113897280518,tradingjunkies.store,https://www.tiktok.com/@tradingjunkies.store,TradingJunkies.Store,False,Learn to trade \n👇👇👇,https://tradingjunkies.store,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,GB,,False,1.0,1.0,884300.0,10500000.0,2109.0,0.0,True,,,Finance & Investing,False,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 17:45:45+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...
2,2,6804917131903255558,surthycooks,https://www.tiktok.com/@surthycooks,Surthycooks,True,🇻🇪 🇱🇧 \nالسلطانة تطبخ\n\nSurthycooks@voilatale...,,https://p19-common-sign-va.tiktokcdn-us.com/to...,https://p19-common-sign-va.tiktokcdn-us.com/to...,False,VE,,False,184.0,98.0,33100000.0,959900000.0,1281.0,0.0,False,,,,,2025-02-19 14:32:05.027700+00:00,Creation Date: 2025-02-18 23:22:16+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...
3,3,7187126075129234438,officialdanfx,https://www.tiktok.com/@officialdanfx,Dan Fx,False,“🚀 Get Funded & Start Trading Today! 👇\n📲 Tap ...,https://linktr.ee/Officialdanfx,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,NG,,False,208.0,23.0,125800.0,971900.0,483.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 10:55:57+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...
4,4,7038449620599342106,modern__trading,https://www.tiktok.com/@modern__trading,MODERN.TRADING.OFFCIAL,False,📈 Professional Trader 〽️\n👇🌹Come To My Website 🔥👇,,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,PK,,False,3.0,1.0,72000.0,514300.0,175.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 14:21:39+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...


In [4]:
finfluencer_list = load_text_file("market_signals_finfluencer_profiles_finfluencers.txt")
nonfinfluencer_list = load_text_file("market_signals_finfluencer_profiles_nonfinfluencers.txt")

In [5]:
identification_results["Finfluencer"] = identification_results["profile"].apply(lambda x: 1 if x in finfluencer_list else 0)
print(identification_results.shape)
identification_results.head()

(397, 31)


Unnamed: 0,custom_id,id,profile,profileUrl,nickName,verified,signature,bioLink,originalAvatarUrl,avatar,privateAccount,region,roomId,ttSeller,following,friends,fans,heart,video,digg,commerceUserInfo.commerceUser,commerceUserInfo.downLoadLink.android,commerceUserInfo.downLoadLink.ios,commerceUserInfo.category,commerceUserInfo.categoryButton,extractionTime,transcripts_combined,identification_user_prompt,identification_system_prompt,identification_llm_response,Finfluencer
0,0,6870704589100942341,caprice4l,https://www.tiktok.com/@caprice4l,Caprice,False,I motivate the youth,https://linktr.ee/caprrice,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,False,US,,False,284.0,232.0,262700.0,9400000.0,592.0,0.0,False,,,,,2025-03-14 17:45:19.606972+00:00,Creation Date: 2025-02-21 01:05:09+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,0
1,1,7077615113897280518,tradingjunkies.store,https://www.tiktok.com/@tradingjunkies.store,TradingJunkies.Store,False,Learn to trade \n👇👇👇,https://tradingjunkies.store,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,GB,,False,1.0,1.0,884300.0,10500000.0,2109.0,0.0,True,,,Finance & Investing,False,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 17:45:45+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1
2,2,6804917131903255558,surthycooks,https://www.tiktok.com/@surthycooks,Surthycooks,True,🇻🇪 🇱🇧 \nالسلطانة تطبخ\n\nSurthycooks@voilatale...,,https://p19-common-sign-va.tiktokcdn-us.com/to...,https://p19-common-sign-va.tiktokcdn-us.com/to...,False,VE,,False,184.0,98.0,33100000.0,959900000.0,1281.0,0.0,False,,,,,2025-02-19 14:32:05.027700+00:00,Creation Date: 2025-02-18 23:22:16+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,0
3,3,7187126075129234438,officialdanfx,https://www.tiktok.com/@officialdanfx,Dan Fx,False,“🚀 Get Funded & Start Trading Today! 👇\n📲 Tap ...,https://linktr.ee/Officialdanfx,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,NG,,False,208.0,23.0,125800.0,971900.0,483.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 10:55:57+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1
4,4,7038449620599342106,modern__trading,https://www.tiktok.com/@modern__trading,MODERN.TRADING.OFFCIAL,False,📈 Professional Trader 〽️\n👇🌹Come To My Website 🔥👇,,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,PK,,False,3.0,1.0,72000.0,514300.0,175.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 14:21:39+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1


# Preprocess Interview Data

In [None]:
# def extract_llm_responses(text):
#     # Split the text by double newlines to separate different questions
#     questions_blocks = text.split('\n\n')
    
#     # Initialize lists to store the extracted data
#     questions_list = []
#     explanations_list = []
#     symbols_list = []
#     categories_list = []
#     speculations_list = []
#     values_list = []

#     # Define regex patterns for each field
#     question_pattern = r'\*\*question: (.*?)\*\*'
#     explanation_pattern = r'\*\*explanation: (.*?)\*\*'
#     symbol_pattern = r'\*\*symbol: (.*?)\*\*'
#     category_pattern = r'\*\*category: (.*?)\*\*'
#     speculation_pattern = r'\*\*speculation: (.*?)\*\*'
#     value_pattern = r'\*\*value: (.*?)\*\*'

#     # Iterate through each question block and extract the fields
#     for block in questions_blocks:
#         question = re.search(question_pattern, block, re.DOTALL)
#         explanation = re.search(explanation_pattern, block, re.DOTALL)
#         symbol = re.search(symbol_pattern, block, re.DOTALL)
#         category = re.search(category_pattern, block, re.DOTALL)
#         speculation = re.search(speculation_pattern, block, re.DOTALL)
#         value = re.search(value_pattern, block, re.DOTALL)

#         questions_list.append(question.group(1) if question else None)
#         explanations_list.append(explanation.group(1) if explanation else None)
#         symbols_list.append(symbol.group(1) if symbol else None)
#         categories_list.append(category.group(1) if category else None)
#         speculations_list.append(speculation.group(1) if speculation else None)
#         values_list.append(value.group(1) if value else None)

#     # Create a DataFrame
#     data = {
#         'question': questions_list,
#         'explanation': explanations_list,
#         'symbol': symbols_list,
#         'category': categories_list,
#         'speculation': speculations_list,
#         'value': values_list
#     }
#     df = pd.DataFrame(data)

#     # Flatten the DataFrame into a single Series
#     flattened_series = pd.Series()
#     for index, row in df.iterrows():
#         question_prefix = row['question']
#         if row['explanation']:
#             flattened_series[f'{question_prefix} - explanation'] = row['explanation']
#         if row['symbol']:
#             flattened_series[f'{question_prefix} - symbol'] = row['symbol']
#         if row['category']:
#             flattened_series[f'{question_prefix} - category'] = row['category']
#         if row['speculation']:
#             flattened_series[f'{question_prefix} - speculation'] = row['speculation']
#         if row['value']:
#             flattened_series[f'{question_prefix} - value'] = row['value']

#     return flattened_series


# extracted_results = identification_results["identification_llm_response"].apply(extract_llm_responses)
# identification_results = pd.concat([identification_results, extracted_results], axis=1)
# identification_results.head()

Unnamed: 0,custom_id,id,profile,profileUrl,nickName,verified,signature,bioLink,originalAvatarUrl,avatar,privateAccount,region,roomId,ttSeller,following,friends,fans,heart,video,digg,commerceUserInfo.commerceUser,commerceUserInfo.downLoadLink.android,commerceUserInfo.downLoadLink.ios,commerceUserInfo.category,commerceUserInfo.categoryButton,extractionTime,transcripts_combined,identification_user_prompt,identification_system_prompt,identification_llm_response,Finfluencer,Is this a finfluencer? - explanation,Is this a finfluencer? - symbol,Is this a finfluencer? - category,Is this a finfluencer? - speculation,"Indicate on a scale of 0 to 100, how influential this influencer is – 0 means not at all influential and 100 means very influential with millions of followers and mainstream recognition? - explanation","Indicate on a scale of 0 to 100, how influential this influencer is – 0 means not at all influential and 100 means very influential with millions of followers and mainstream recognition? - speculation","Indicate on a scale of 0 to 100, how influential this influencer is – 0 means not at all influential and 100 means very influential with millions of followers and mainstream recognition? - value","Indicate on a scale of 0 to 100, how credible or authoritative this influencer is – 0 means not at all credible or authoritative and 100 means very credible and authoritative? - explanation","Indicate on a scale of 0 to 100, how credible or authoritative this influencer is – 0 means not at all credible or authoritative and 100 means very credible and authoritative? - speculation","Indicate on a scale of 0 to 100, how credible or authoritative this influencer is – 0 means not at all credible or authoritative and 100 means very credible and authoritative? - value",Which of these areas of finance are the primary focus of the influencer’s posts? - explanation,Which of these areas of finance are the primary focus of the influencer’s posts? - symbol,Which of these areas of finance are the primary focus of the influencer’s posts? - category,Which of these areas of finance are the primary focus of the influencer’s posts? - speculation,"Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's individual stock predictions – 0 means very low quality and 100 means very high quality? - explanation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's individual stock predictions – 0 means very low quality and 100 means very high quality? - speculation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's individual stock predictions – 0 means very low quality and 100 means very high quality? - value","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's evaluation of market sentiment – 0 means very low quality and 100 means very high quality? - explanation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's evaluation of market sentiment – 0 means very low quality and 100 means very high quality? - speculation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's evaluation of market sentiment – 0 means very low quality and 100 means very high quality? - value","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's broader evaluation of the economy – 0 means very low quality and 100 means very high quality? - explanation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's broader evaluation of the economy – 0 means very low quality and 100 means very high quality? - speculation","Indicate on a scale of 0 to 100, how would you rate the quality of this influencer's broader evaluation of the economy – 0 means very low quality and 100 means very high quality? - value",Who is the finfluencer’s target audience? - explanation,Who is the finfluencer’s target audience? - symbol,Who is the finfluencer’s target audience? - category,Who is the finfluencer’s target audience? - speculation
0,0,6870704589100942341,caprice4l,https://www.tiktok.com/@caprice4l,Caprice,False,I motivate the youth,https://linktr.ee/caprrice,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,https://p16-sign.tiktokcdn-us.com/tos-useast5-...,False,US,,False,284.0,232.0,262700.0,9400000.0,592.0,0.0,False,,,,,2025-03-14 17:45:19.606972+00:00,Creation Date: 2025-02-21 01:05:09+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,0,The profile does not explicitly focus on finan...,A2),No,10,"The user has 262,700 followers, which is a sig...",20,60,The profile does not provide any credentials o...,30,20,The content does not focus on any specific are...,B7),Other Personal Finances,10,There is no evidence of stock predictions or f...,0,0,The profile does not provide any content relat...,0,0,There is no content related to economic evalua...,0,0,The content appears to be lifestyle and person...,C4),Others,10
1,1,7077615113897280518,tradingjunkies.store,https://www.tiktok.com/@tradingjunkies.store,TradingJunkies.Store,False,Learn to trade \n👇👇👇,https://tradingjunkies.store,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,GB,,False,1.0,1.0,884300.0,10500000.0,2109.0,0.0,True,,,Finance & Investing,False,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 17:45:45+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1,"The profile ""tradingjunkies.store"" is focused ...",A1),Yes,10,"The profile has 884,300 followers, which is a ...",20,75,The profile does not provide specific credenti...,50,40,The primary focus of the posts is on stock tra...,B1),Stock Trading and Equities,10,There is no specific information provided abou...,70,30,The profile does not provide detailed evaluati...,70,25,The profile does not provide broader evaluatio...,80,20,The target audience appears to be young invest...,C1),Young Investors,30
2,2,6804917131903255558,surthycooks,https://www.tiktok.com/@surthycooks,Surthycooks,True,🇻🇪 🇱🇧 \nالسلطانة تطبخ\n\nSurthycooks@voilatale...,,https://p19-common-sign-va.tiktokcdn-us.com/to...,https://p19-common-sign-va.tiktokcdn-us.com/to...,False,VE,,False,184.0,98.0,33100000.0,959900000.0,1281.0,0.0,False,,,,,2025-02-19 14:32:05.027700+00:00,Creation Date: 2025-02-18 23:22:16+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,0,"The profile ""surthycooks"" does not exhibit any...",A2),No,0,The profile has 33.1 million followers and a h...,0,95,"The profile does not focus on finance, so cred...",0,80,The profile does not focus on finance; it is c...,B7),Other Personal Finances,0,The profile does not provide any stock predict...,0,0,The profile does not engage in evaluating mark...,0,0,The profile does not provide any evaluation of...,0,0,"The profile is not a financial influencer, so ...",C4),Others,0
3,3,7187126075129234438,officialdanfx,https://www.tiktok.com/@officialdanfx,Dan Fx,False,“🚀 Get Funded & Start Trading Today! 👇\n📲 Tap ...,https://linktr.ee/Officialdanfx,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,NG,,False,208.0,23.0,125800.0,971900.0,483.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 10:55:57+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1,The profile of Dan Fx includes promotional con...,A1),Yes,10,"The influencer has 125,800 followers and a tot...",20,50,The influencer provides free courses and strat...,30,40,The primary focus of the influencer's posts is...,B1),Stock Trading and Equities,0,The influencer's content does not focus on ind...,100,0,The influencer's content is primarily focused ...,70,20,The influencer's content does not provide a br...,100,0,The content is geared towards beginners and th...,C1),Young Investors,20
4,4,7038449620599342106,modern__trading,https://www.tiktok.com/@modern__trading,MODERN.TRADING.OFFCIAL,False,📈 Professional Trader 〽️\n👇🌹Come To My Website 🔥👇,,https://p16-common-sign-va.tiktokcdn-us.com/to...,https://p16-common-sign-va.tiktokcdn-us.com/to...,False,PK,,False,3.0,1.0,72000.0,514300.0,175.0,0.0,False,,,,,2025-03-14 18:19:23.379150+00:00,Creation Date: 2025-03-14 14:21:39+00:00\nVide...,You will be presented with a series of questio...,You are analyzing a social media profile on Ti...,**question: Is this a finfluencer?** \n**expl...,1,"The profile ""MODERN.TRADING.OFFCIAL"" has a str...",A1),Yes,10,"The profile has 72,000 followers, which is a m...",20,40,"The profile claims to be a ""Professional Trade...",40,30,The primary focus of the influencer's posts is...,B5),Cryptocurrency,10,The profile does not provide specific stock pr...,70,10,The influencer provides some insights into mar...,50,20,The profile does not provide a broader evaluat...,80,10,The target audience appears to be young invest...,C1),Young Investors,30


# Perform Identification Evaluation

In [7]:
def calculate_metrics(df):
    # Convert "Is this a finfluencer? - category" column to binary values
    df['Is this a finfluencer? - category'] = df['Is this a finfluencer? - category'].map({'Yes': 1, 'No': 0})
    
    # Ensure there are no NaN values in the columns
    df = df.dropna(subset=['Finfluencer', 'Is this a finfluencer? - category'])
    
    # Extract the true labels and predicted labels
    y_true = df['Finfluencer']
    y_pred = df['Is this a finfluencer? - category']
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate macro-averaged F1 score
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    
    # Calculate AUC score
    auc = roc_auc_score(y_true, y_pred)
    
    return accuracy, macro_f1, auc

accuracy, macro_f1, auc = calculate_metrics(identification_results)
print(f'Accuracy: {accuracy:.5f}')
print(f'Macro-averaged F1 score: {macro_f1:.5f}')
print(f'AUC score: {auc:.5f}')

Accuracy: 0.96474
Macro-averaged F1 score: 0.96472
AUC score: 0.96496


# Save Formatted Interview Results

In [None]:
identification_results.to_csv("../data/market-signals-finfluencer/profile_metadata_post_identification_formatted.csv", index=False)