In [2]:
!pip install -q pandas datasets

In [3]:
import pandas as pd 
import json 

df = pd.read_csv('./rawdata/trip/20231205.csv')

numeric_columns = ['normal_avg_bet', 'normal_count', 'normal_bet', 
                   'normal_win', 'buy_avg_bet', 'buy_count', 
                   'buy_bet', 'buy_win', 'total_net']

df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')


In [4]:
def create_analytical_dialogue(row):
    net_result = "profit" if row['total_net'] > 0 else "loss"
    special_bet_ratio = round((row['buy_bet'] / row['normal_bet']) * 100, 2) if row['normal_bet'] > 0 else 0

    dialogue = (
        f"###Human: Can you analyze my slot game data?\n"
        f"###Assistant: Sure, please provide the details.\n"
        f"###Human: I played on slot {row['slotId']}. I had {row['normal_count']} regular rounds with an average bet of {row['normal_avg_bet']}. "
        f"My total bet was {row['normal_bet']} and I won {row['normal_win']}.\n"
        f"###Assistant: Did you make any special bets?\n"
        f"###Human: Yes, I made {row['buy_count']} special bets with an average of {row['buy_avg_bet']} per bet, totaling {row['buy_bet']} and winning {row['buy_win']}.\n"
        f"###Assistant: Based on your data, your special bets constituted {special_bet_ratio}% of your total bets. You ended the day with a {net_result}, with a total net of {row['total_net']}.\n"
        f"###Human: What insights can you give me about my betting strategy?\n"
        f"###Assistant: Your strategy shows a higher engagement in regular rounds compared to special bets. It's advisable to balance your bets and monitor the outcomes closely to optimize your strategy for better results."
    )
    return dialogue

df['analytical_dialogue'] = df.apply(create_analytical_dialogue, axis=1)
print(df['analytical_dialogue'].head())



0    ###Human: Can you analyze my slot game data?\n...
1    ###Human: Can you analyze my slot game data?\n...
2    ###Human: Can you analyze my slot game data?\n...
3    ###Human: Can you analyze my slot game data?\n...
4    ###Human: Can you analyze my slot game data?\n...
Name: analytical_dialogue, dtype: object


In [5]:
train_data = df['analytical_dialogue'].tolist()

In [7]:
with open('20231215.jsonl', 'w') as jsonl_file:
    for dialogue in train_data:
        json_obj = {"text": dialogue}
        jsonl_file.write(json.dumps(json_obj) + '\n')