# Libraries

In [1]:
# Standard libs
import pandas as pd

# API interaction
from urllib.request import urlopen
import json
import time

# UI
from IPython.display import clear_output

# Notebook Settings

In [2]:
# Pandas
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# Save data location
save_data_complete = 'C:/Users/nuke2/Desktop/Python/Projects/Data/SaltyBetter/salty_data_complete.csv'

# SaltyData API
match_url = "https://salty-boy.com/current-match"
fighter_url = 'https://salty-boy.com/fighters?id='

# SaveData

In [3]:
# Read savedata
save_data = pd.read_csv(save_data_complete)
print(f"Rows: {len(save_data)}")

Rows: 40707


In [4]:
save_data

Unnamed: 0,name,id,best_streak,prev_tier,elo,tier,tier_elo,average_bet,total_matches,win_rate,fighter_blue,fighter_red,bet_blue,bet_red,match_id,match_format,streak_blue,streak_red,match_tier,winner,winner_binary,away,name_other,id_other,best_streak_other,prev_tier_other,elo_other,tier_other,tier_elo_other,average_bet_other,total_matches_other,win_rate_other
0,Tannhauser,804,5,S,1560,S,1559,10870713.45,44,0.61,804,803,15182145,8105369,452,matchmaking,-2,1,S,804,0.0,803.0,Shin meep140,803.0,2.0,S,1338.0,S,1338.0,4979841.32,37.0,0.27
1,Tannhauser,804,5,S,1560,S,1559,10870713.45,44,0.61,884,804,2335392,14252526,9198,matchmaking,-14,1,S,804,0.0,884.0,Diego brando,884.0,14.0,S,1544.0,S,1433.0,4989952.13,39.0,0.64
2,Tannhauser,804,5,S,1560,S,1559,10870713.45,44,0.61,804,6375,3225051,10389988,9532,matchmaking,2,6,S,6375,1.0,6375.0,Shindo tamaki,6375.0,14.0,S,1702.0,S,1699.0,18188514.31,42.0,0.79
3,Tannhauser,804,5,S,1560,S,1559,10870713.45,44,0.61,769,804,7762748,5408573,15958,matchmaking,3,-1,S,769,1.0,769.0,Element kyo,769.0,9.0,S,1623.0,S,1620.0,8723817.10,50.0,0.70
4,Tannhauser,804,5,S,1560,S,1559,10870713.45,44,0.61,804,1490,13472658,10784491,20616,matchmaking,-2,1,S,804,0.0,1490.0,Skolldir,1490.0,6.0,S,1587.0,S,1586.0,7656194.80,55.0,0.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40702,Yuka,6693,13,S,1686,S,1686,14027290.90,49,0.78,2901,6693,1423695,6935818,122681,matchmaking,1,2,S,6693,0.0,2901.0,Yamazaki rb,2901.0,4.0,S,1329.0,S,1328.0,4044211.33,46.0,0.30
40703,Yuka,6693,13,S,1686,S,1686,14027290.90,49,0.78,6693,937,5471229,1247785,125123,matchmaking,3,1,S,6693,0.0,937.0,Orochi-bs,937.0,8.0,S,1621.0,S,1612.0,6681492.44,62.0,0.65
40704,Yuka,6693,13,S,1686,S,1686,14027290.90,49,0.78,6693,1959,920639,1896971,138437,tournament,6,-1,S,6693,0.0,1959.0,New rubber soul,1959.0,7.0,S,1581.0,S,1580.0,10840093.64,61.0,0.66
40705,Yuka,6693,13,S,1686,S,1686,14027290.90,49,0.78,2178,6693,1081341,1160898,138443,tournament,3,7,S,6693,0.0,2178.0,Shin akuma svc chaos,2178.0,11.0,S,1710.0,S,1706.0,21072117.53,58.0,0.79


# Functions

#### Get winner column (binary)

In [5]:
# Get winner (binary)
def get_winner(row):
    # Check if the given id matches the winner id.
    if row.id == row.winner:
        # If it does, the fighter with the given id won the fight, so return 0.
        return 0
    # If the given id doesn't match the winner id, the fighter with the given
    # id lost the fight, so return 1.
    elif row.id != row.winner:
        return 1

#### Get other fighter id to fetch data later

In [6]:
# Get other fighter id to get other fighting data
def get_other_fighter(row):
    # Check if the given id matches the blue corner fighter.
    if row.id == row.fighter_blue:
        # If it does, return the id of the red corner fighter.
        return row.fighter_red
    # If the given id doesn't match the blue corner fighter, check if it
    # matches the red corner fighter.
    elif row.id == row.fighter_red:
        # If it does, return the id of the blue corner fighter.
        return row.fighter_blue

#### Get fighter data from id input

In [7]:
# Define a dictionary that maps column names to their corresponding keys in the
# fighter data JSON.
columns_fighter = {
    'name': 'name', # 0
    'id': 'id', # 1
    'best_streak': 'best_streak', # 2
    'prev_tier': 'prev_tier', # 3
    'elo': 'elo', # 4
    'tier': 'tier', # 5
    'tier_elo': 'tier_elo', # 6
    'average_bet': 'average_bet', # 7
    'total_matches': 'total_matches', # 8
    'win_rate': 'win_rate', # 9
}

# Extract the column names from the dictionary as a list.
data_extraction_fighters = list(columns_fighter.keys())

def get_fighter_data(fighter_id):
    
    # Wait one second to not spam the server
    time.sleep(0.5)
    
    fighter_id = int(fighter_id)
    
    # Open the URL for the fighter data for the given ID and load the JSON.
    response_fighter = urlopen(fighter_url+str(fighter_id))
    fighter_data_json = json.loads(response_fighter.read())

    # Loop through each column name in the list of data to extract.
    for key in columns_fighter:
        # Extract the relevant data from the JSON and store it in a temporary list.
        temp = [
            fighter_data_json[data_extraction_fighters[0]],
            fighter_data_json[data_extraction_fighters[1]],
            fighter_data_json[data_extraction_fighters[2]],
            fighter_data_json[data_extraction_fighters[3]],
            fighter_data_json[data_extraction_fighters[4]],
            fighter_data_json[data_extraction_fighters[5]],
            fighter_data_json[data_extraction_fighters[6]],
            fighter_data_json['stats'][data_extraction_fighters[7]],
            fighter_data_json['stats'][data_extraction_fighters[8]],
            fighter_data_json['stats'][data_extraction_fighters[9]]
        ]
    print(f"[INFO] -- Getting data for fighter {fighter_id}")
    return temp

#### Debug

In [8]:
response_match = urlopen(match_url)
match_data_json = json.loads(response_match.read())
print(f"Blue: {match_data_json['fighter_blue']}")
print(f"Red: {match_data_json['fighter_red']}")

Blue: Another igniz EX2
Red: Slender man


#### Upate match data (save file)

In [9]:
def update_saved_data():
    # Wait X seconds to not spam the server
    time.sleep(30)

    # store the response of URL
    response_match = urlopen(match_url)

    # storing the JSON response from url in data
    match_data_json = json.loads(response_match.read())

    # Define columns
    columns_match_data = {
        'name': 'name', # 0
        'id': 'id', # 1
        'best_streak': 'best_streak', # 2
        'prev_tier': 'prev_tier', # 3
        'elo': 'elo', # 4
        'tier': 'tier', # 5
        'tier_elo': 'tier_elo', # 6
        'average_bet': 'average_bet', # 7
        'total_matches': 'total_matches', # 8
        'win_rate': 'win_rate', # 9
        'fighter_blue': 'fighter_blue', # 10
        'fighter_red': 'fighter_red', # 11
        'bet_blue': 'bet_blue', # 12
        'bet_red': 'bet_red', # 13
        'match_id': 'id', # 14
        'match_format': 'match_format', # 15
        'streak_blue': 'streak_blue', # 16
        'streak_red': 'streak_red', # 17
        'match_tier': 'tier', # 18
        'winner': 'winner' # 19
    }

    data_extraction_match = list(columns_match_data.values())

    # Read save data
    save_data = pd.read_csv(save_data_complete)

    # Init match_data
    match_data = pd.DataFrame(columns = columns_match_data)

    # Define lists for data extraction
    sides = ['fighter_blue_info', 'fighter_red_info']

    for side in sides:
        if match_data_json[side] == None:
            continue
        for idx, match in enumerate(match_data_json[side]['matches']):
            temp_data = [
                match_data_json[side][data_extraction_match[0]],
                match_data_json[side][data_extraction_match[1]],
                match_data_json[side][data_extraction_match[2]],
                match_data_json[side][data_extraction_match[3]],
                match_data_json[side][data_extraction_match[4]],
                match_data_json[side][data_extraction_match[5]],
                match_data_json[side][data_extraction_match[6]],
                match_data_json[side]['stats'][data_extraction_match[7]],
                match_data_json[side]['stats'][data_extraction_match[8]],
                match_data_json[side]['stats'][data_extraction_match[9]],
                match_data_json[side]['matches'][idx][data_extraction_match[10]],
                match_data_json[side]['matches'][idx][data_extraction_match[11]],
                match_data_json[side]['matches'][idx][data_extraction_match[12]],
                match_data_json[side]['matches'][idx][data_extraction_match[13]],
                match_data_json[side]['matches'][idx][data_extraction_match[14]],
                match_data_json[side]['matches'][idx][data_extraction_match[15]],
                match_data_json[side]['matches'][idx][data_extraction_match[16]],
                match_data_json[side]['matches'][idx][data_extraction_match[17]],
                match_data_json[side]['matches'][idx][data_extraction_match[18]],
                match_data_json[side]['matches'][idx][data_extraction_match[19]],
            ]

            match_number = match_data_json[side]['matches'][idx][data_extraction_match[14]]
            if match_number in match_data['match_id'].values:
                continue
            if match_number in save_data['match_id'].values:
                continue

            match_data = pd.concat([match_data, pd.DataFrame([temp_data], columns = list(columns_match_data.keys()))])
    if len(match_data) != 0:
        match_data = match_data.reset_index(drop=True)
        match_data['winner_binary'] = match_data.apply(get_winner, axis = 1)
        match_data['away'] = match_data.apply(get_other_fighter, axis = 1)
    updated_match_data = pd.concat([save_data, match_data], axis=0).reset_index(drop=True)
    try:
        print("[INFO] -- Updating dataframe")
        time.sleep(2)
        for idx, row in updated_match_data.iterrows():
            if pd.isna(row['name_other']):
                clear_output(wait=True)
                updated_match_data.iloc[idx, 22:] = get_fighter_data(row['away'])
    except:
        print("[INFO] -- Skipped updates")
        pass
    updated_match_data.to_csv(save_data_complete, index=False)
    clear_output(wait=True)
    print(f"duplicated matches: {sum(updated_match_data.duplicated(subset=['match_id']))}")
    print(f"rows: {len(updated_match_data)}")

In [None]:
try:
    while True:
        update_saved_data()
except KeyboardInterrupt:
    pass

duplicated matches: 0
rows: 43191
[INFO] -- Updating dataframe


# --------------------

# --------------------

# Cleaning

# Extras

In [None]:
columns_fighter_data = {
    'name_other': 'name', # 0
    'id_other': 'id', # 1
    'best_streak_other': 'best_streak', # 2
    'prev_tier_other': 'prev_tier', # 3
    'elo_other': 'elo', # 4
    'tier_other': 'tier', # 5
    'tier_elo_other': 'tier_elo', # 6
    'average_bet_other': 'average_bet', # 7
    'total_matches_other': 'total_matches', # 8
    'win_rate_other': 'win_rate', # 9
}

fighter_other_names = list(columns_fighter_data.keys())

# for idx in fighter_other_names:
#     match_data[idx] = -421